PH Covid-19 Data Analysis

By: Marlon Teodosio
last update: May 18, 2020

In [861]:
from IPython.display import HTML

HTML('''<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }

  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
Out[861]:
In [863]:
import plotly.express as px
import plotly.io as pio
pio.templates

import plotly.graph_objects as go
import plotly.offline as py
py.init_notebook_mode()
# pd.options.display.float_format = '{:,.1f}'.format

from plotly.subplots import make_subplots

# Choose the look of the graphs
pio.templates.default = 'plotly_white'

Data Source

The data is downloaded from the official DOH COVID-19 Data Drop.

In [865]:
# checker
# covid_ph[covid_ph.RegionRes == 'NCR'].groupby('CityMunRes')['CaseCode'].count().sort_values(ascending=False).index.tolist()
In [870]:
# HOSPITAL - BED & MECH VENT
url = 'https://drive.google.com/file/d/1EgcpVQlCGZ3oKTynfvmkGd3-32GH4JUd/view?usp=sharing'
file_id = url.split('/')[5]
dwn_url='https://drive.google.com/uc?export=download&id=' + file_id
url = requests.get(dwn_url).text
csv_raw = StringIO(url)
df_hosp = pd.read_csv(csv_raw)
        
# df_hosp.head(5)

Data Analysis

PH Latest Statistics

In [873]:
total_cases = len(covid_ph)
recovered = len(covid_ph.loc[covid_ph.RemovalType=='Recovered'])
died = len(covid_ph.loc[covid_ph.RemovalType=='Died'])
active_cases = total_cases - recovered - died
percent_active = active_cases / total_cases
percent_recovered = recovered / total_cases
percent_died = died / total_cases
asof = pd.to_datetime(covid_ph.DateRepConf.unique().max()) # latest available data
new = len(covid_ph[covid_ph.DateRepConf == asof])

# print("As of", asof.date().strftime("%b %d, %Y"), ":")
# print("PH - TOTAL CONFIRMED CASES:", total_cases)
# print("- New Recorded Deaths:", covid_ph[(covid_ph.RemovalType == "Died") & (covid_ph.DateRepRem == asof)].shape[0])
# print("- New Recorded Recoveries:", covid_ph[(covid_ph.RemovalType == "Recovered") & (covid_ph.DateRepRem == asof)].shape[0])
# print("---------------------------------------------------------------------")
# print(f"Active Cases: {active_cases} ({100*percent_active:.1f}% of total cases)")
# print(f"Died: {died} ({100*percent_died:.1f}% of total cases)")
# print(f"Recovered: {recovered} ({100*percent_recovered:.1f}% of total cases)")
PH COVID-19 Tracker
As of May 16, 2020
           Average New Cases reported in the last 7 days: 242
           Average New Cases reported in the last 30 days: 221
In [876]:
covid_ph1 = covid_ph.copy()
covid_ph1['RemovalType'] = covid_ph1.RemovalType.fillna('Active')
data = covid_ph1[covid_ph1.RemovalType == 'Active'].groupby('HealthStatus').CaseCode.count().reset_index().rename({'CaseCode': 'Count'}, axis=1)
data['Percent'] = data['Count']/data['Count'].sum()*100
data['Pct'] = round(data['Percent'], 1).astype(str)+'%'
data['Cum_percent'] = data.Percent.cumsum()

# a = pd.Series(['Critical', 'Severe', 'Mild', 'Asymptomatic']).reset_index().rename({0: 'HealthStatus'}, axis=1)
a = pd.Series(['Asymptomatic', 'Mild', 'Severe', 'Critical']).reset_index().rename({0: 'HealthStatus'}, axis=1)
data1 = pd.merge(data, a, how='left', on='HealthStatus') 
data1 = data1.sort_values(by='index', ascending=False)
data1['Count_percent'] = data['Count'].astype(str) + " (" + data1['Pct'] + ")"

fig = px.bar(data1, x='Count', y='HealthStatus', orientation='h',  
             text='Count', hover_name='Pct', height=400)
fig.update_layout(title=("Health Status of Currently Active Cases"), showlegend=False,
                  hoverlabel=dict(font_size=17, bgcolor='blue'), hovermode='y', margin=dict(l=200))
fig.update_traces(textposition='outside', textfont_size=15)
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False, title=None,
                 range=[0, data1.Count.sum()])
fig.update_yaxes(title=None, tickfont_size=15)
fig.show()
In [877]:
topcity = covid_ph[covid_ph.RegionRes == 'NCR'].groupby('CityMunRes')['CaseCode'].count().sort_values(ascending=False).index.tolist()
# topcity.remove('For Validation')
# topcity.append('For Validation')
topcity

data01 = pd.DataFrame()
for i in list(reversed(topcity)):
    df = covid_ph[covid_ph.CityMunRes == i].groupby('RemovalType').count()['CaseCode'].reset_index()
    df = df.sort_values('RemovalType')
    df['City'] = i
    df = df.rename({'CaseCode': 'Count0', 'RemovalType': 'Type'}, axis=1)
    df['Percent'] = df['Count0'] / df['Count0'].sum()*100
    df['Pct'] = round(df['Percent'], 1).astype(str)+'%'
    data01 = data01.append(df)
    
data01['Count'] = data01.Count0.astype(int).apply(lambda x : "{:,}".format(x))
cases_ncr = covid_ph[covid_ph.RegionRes == 'NCR'].shape[0]

fig = px.bar(data01, x='Percent', orientation='h', color='Type', 
             title="Total Confirmed Cases in <b>NCR</b>: " + f"<b>{cases_ncr}</b>" + " (" + str(round(cases_ncr/total_cases*100,1)) + "% of Total)",
             text='Count', hover_name='Count', height=800)
fig.update_layout(legend=dict(orientation='h', x=0.52, y=1, title="", font_size=12), hoverlabel=dict(font_size=20))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=8))
for i in range(0, len(data01.City.unique())):
    # City
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=0, y=i,
                            text=data01.City.unique()[i] + " "*15), font_size=15)
    
    # Death Rate
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='left', yanchor='middle',
                            ax=0, ay=0,
                            x=104, y=i,
                            text=data01[(data01.Type == 'Died') & (data01.City == list(reversed(topcity))[i])]['Pct'].values[0]), 
                       font_size=14, font_color = px.colors.qualitative.Plotly[1])

    # Recovery Rate
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='left', yanchor='middle',
                            ax=0, ay=0,
                            x=119, y=i,
                            text=data01[(data01.Type == 'Recovered') & (data01.City == list(reversed(topcity))[i])]['Pct'].values[0]), 
                       font_size=14, font_color = px.colors.qualitative.Plotly[2])

    # Total Confirmed Cases
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=0, y=i,
                            text="{:,}".format(data01[data01.City == list(reversed(topcity))[i]].Count0.sum())+" "*3), 
                       font_size=14)

# Death Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                        arrowhead=0, xanchor='left', yanchor='middle',
                        ax=0, ay=0,
                        x=103, y=17,
                        text="Fatality<br>Rate"), 
                   font_size=11, font_color = px.colors.qualitative.Plotly[1])

# Recovery Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                        arrowhead=0, xanchor='left', yanchor='middle',
                        ax=0, ay=0,
                        x=118, y=17,
                        text="Recovery<br>Rate"), 
                   font_size=11, font_color = px.colors.qualitative.Plotly[2])

# Total Confirmed Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=-1, y=17,
                        text="Confirmed<br>Cases"), 
                   font_size=11)

fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=8, title=None)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()

Daily Cases

In [878]:
base = pd.DataFrame(covid_ph.DateRepConf.sort_values().unique()).rename({0: 'Date'}, axis=1)

c = cases_daily.reset_index().rename({'DateRepConf': 'Date', 'CaseCode': 'Count'}, axis=1)
c = pd.merge(base, c, how='left', on='Date')
c['Type'] = 'Reported Confirmed Cases'
c.Count.fillna(0)
c['Cum_count'] = c.Count.cumsum()
c['Percent_change'] = c.Count.pct_change()*100
c['%Change'] = c.Cum_count.pct_change()*100

d = deaths_daily.reset_index().rename({'DateRepRem': 'Date', 'CaseCode': 'Count'}, axis=1)
d = pd.merge(base, d, how='left', on='Date')
d['Type'] = 'Reported Deaths'
#d = deaths_daily.reset_index().rename({'DateDied': 'Date', 'CaseCode': 'Count'}, axis=1)
#d['Type'] = 'Number of Actual Deaths'
d.Count = d.Count.fillna(0)
d['Cum_count'] = d.Count.cumsum()
d['Percent_change'] = d.Count.pct_change()*100
d['%Change'] = d.Cum_count.pct_change()*100

r = recoveries_daily.reset_index().rename({'DateRepRem': 'Date', 'CaseCode': 'Count'}, axis=1) 
r = pd.merge(base, r, how='left', on='Date')
r['Type'] = 'Reported Recoveries'
#r = recoveries_daily.reset_index().rename({'DateRecover': 'Date', 'CaseCode': 'Count'}, axis=1)
#r['Type'] = 'Number of Actual Recoveries'
r.Count = r.Count.fillna(0)
r['Cum_count'] = r.Count.cumsum()
r['Percent_change'] = r.Count.pct_change()*100
r['%Change'] = r.Cum_count.pct_change()*100

import statsmodels.api as sm

cycle, trend1 = sm.tsa.filters.hpfilter(c['Count'], 7)
c['trend_HP'] = trend1
cycle, trend2 = sm.tsa.filters.hpfilter(d['Count'], 7)
d['trend_HP'] = trend2
cycle, trend3 = sm.tsa.filters.hpfilter(r['Count'], 7)
r['trend_HP'] = trend3

c['trend_7MA'] = c['Count'].rolling(7).mean()
d['trend_7MA'] = d['Count'].rolling(7).mean()
r['trend_7MA'] = r['Count'].rolling(7).mean()

c['trend_7MA'] = c['trend_7MA'].fillna(" ")

dr = pd.concat([r,d])

dr['trend_7MA_name'] = 'Trend (7-day MA)'
dr['trend_HP_name'] = 'Trend (HP Filter)'

d['trend_7MA_name'] = 'Trend (7-day MA)'
d['trend_HP_name'] = 'Trend (HP Filter)'
r['trend_7MA_name'] = 'Trend (7-day MA)'
r['trend_HP_name'] = 'Trend (HP Filter)'
c['trend_7MA_name'] = 'Trend (7-day MA)'
c['trend_HP_name'] = 'Trend (HP Filter)'

Cumulative Cases

Cumulative Cases In Log Scale
Note: Day 0 is March 15 when ECQ was implemented & hit 140 confirmed cases.

Percent Change on Daily Cases

In [885]:
# Compute trend based on Hodrick-Prescott Filter
import statsmodels.api as sm

c1 = c[c.Date >= '2020-03-15']
d1 = d[d.Date >= '2020-03-15']
r1 = r[r.Date >= '2020-03-15']

c1 = c1.reset_index(drop=True)
cycle, trend = sm.tsa.filters.hpfilter(c1['%Change'], 7)
c1['trend'] = trend

d1 = d1.reset_index(drop=True)
cycle, trend = sm.tsa.filters.hpfilter(d1['%Change'], 7)
d1['trend'] = trend

r1 = r1.reset_index(drop=True)
cycle, trend = sm.tsa.filters.hpfilter(r1['%Change'], 7)
r1['trend'] = trend
-------------------------------- Confirmed Cases --------------------------------
1-day Percent Growth: 1.8%
3-day Average Percent Growth: 1.9%
7-day Average Percent Growth: 2.1%
--------------------------------Reported Deaths--------------------------------
1-day Percent Growth: 1.4%
3-day Average Percent Growth: 1.9%
7-day Average Percent Growth: 2.2%
In [890]:
print('-------------------------------- Reported Recoveries --------------------------------')
print('1-day Percent Growth:', str(round(r1['%Change'].tail(1).values[0],1))+'%')
print('3-day Average Percent Growth:', str(round(r1['%Change'].tail(3).mean(),1))+'%')
print('7-day Average Percent Growth:', str(round(r1['%Change'].tail(7).mean(),1))+'%')
-------------------------------- Reported Recoveries --------------------------------
1-day Percent Growth: 4.1%
3-day Average Percent Growth: 4.4%
7-day Average Percent Growth: 4.8%
In [891]:
# Figure 3
r1['trend_name'] = 'Trend (HP Filter)'
fig_trend = px.line(r1, x='Date', y='trend', color='trend_name', color_discrete_sequence=["limegreen"])

fig2 = px.line(r1, x='Date', y='%Change', color='Type', 
               color_discrete_sequence=["limegreen"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="%Change", showgrid=False, tickfont_size=10)
fig2.update_layout(title=dict(x=0.5, text="%Change on Daily Reported Recoveries"),
                   xaxis=dict(tickmode='array', #tick0=4, dtick=0, 
                              tickfont_size=9, title="Date", showgrid=False),
                   legend=dict(orientation="v", title="",
                              x=0.75, y=0.85, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

# fig2.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
#                          xanchor='left', yanchor='middle', ax=0, ay=0,
#                          x=pd.to_datetime(d1.Date.unique().max()), 
#                          y=round(r1['trend'][-1:].values[0],1),
#                          text=" "*2 + round(r1['trend'][-1:].values[0],1).astype('str')+'%', 
#                          font_size=14, font_color='limegreen'))
fig2.show()

Active-Death-Recovery Trend

In [892]:
active_daily = np.maximum(cases_daily - recoveries_daily  - deaths_daily, 0).fillna(0)

active_rate_daily = (active_daily.cumsum() / cases_daily.cumsum())*100
death_rate_daily = (deaths_daily.cumsum() / cases_daily.cumsum())*100
recovery_rate_daily = (recoveries_daily.cumsum() / cases_daily.cumsum())*100

Patients Profile

Sex

*Note: There are 0 cases with no sex information

Age

*Note: There are 32 ( 0.26% ) cases with no age information
In [900]:
df_recovered = covid_ph.loc[(covid_ph.RemovalType=='Recovered')].reset_index(drop=True)
df_died = covid_ph.loc[(covid_ph.RemovalType=='Died')].reset_index(drop=True)

df = pd.concat([df_recovered, df_died])

# Recovered Patients
fig1 = px.histogram(df_recovered, x='Age', color='Sex', opacity=0.8, 
                   marginal='box', barmode='overlay', 
                   labels={"RemovalType": "PatientType"}, 
                   title='Age Distribution of Recovered Patients')
fig1.update_layout(legend=dict(orientation="h", title="", x=0.03, y=0.65, yanchor="bottom"),                   
                  xaxis=dict(tickmode='linear', dtick=10))

fig1.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_recovered[df_recovered.Sex == 'Male'].Age.median()+1, y=0.86,
                        text=int(df_recovered[df_recovered.Sex == 'Male'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[0]))
fig1.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_recovered[df_recovered.Sex == 'Female'].Age.median()+1, y=0.99,
                        text=int(df_recovered[df_recovered.Sex == 'Female'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[1]))
fig1.show()

fig2 = px.histogram(df_died, x='Age', color='Sex', opacity=0.8, 
                   marginal='box', barmode='overlay', 
                   labels={"RemovalType": "PatientType"}, 
                   title='Age Distribution of Died Patients')
fig2.update_layout(legend=dict(orientation="h", title="", x=0.03, y=0.65, yanchor="bottom"),
                   xaxis=dict(tickmode='linear', dtick=10))

fig2.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_died[df_died.Sex == 'Male'].Age.median()+1, y=0.86,
                        text=int(df_died[df_died.Sex == 'Male'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[0]))
fig2.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_died[df_died.Sex == 'Female'].Age.median()+1, y=0.99,
                        text=int(df_died[df_died.Sex == 'Female'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[1]))
fig2.show()

Residence

In [901]:
df_regres = covid_ph.groupby('ProvRes').count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres['PH'] = 'Philippines'
df_regres.rename({'CaseCode': 'Count'}, inplace=True, axis=1)
df_regres['Percentage'] = round((df_regres.Count / df_regres.Count.sum())*100,1).astype('str')+'%' 
df_regres = df_regres[df_regres.ProvRes != 'For Validation']

df_regres1 = covid_ph.groupby(['ProvRes', 'CityMunRes']).count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres1['PH'] = 'Philippines'
df_regres1.rename({'CaseCode': 'Count'}, inplace=True, axis=1)
df_regres1['Percentage'] = round((df_regres1.Count / df_regres1.Count.sum())*100,1).astype('str')+'%' 
df_regres1 = df_regres1[df_regres1.ProvRes != 'For Validation']
df_regres1 = df_regres1[df_regres1.CityMunRes != 'For Validation']
In [902]:
top = 10
fig1 = px.bar(df_regres.head(top), x="Count", y="ProvRes",
             orientation='h', hover_name='Percentage',
             text="Count",
             title="Provinces with Most Cases")
fig1.update_traces(textposition='outside', textfont_size=12)
fig1.update_layout(title=dict(x=0.5), yaxis=dict(categoryorder='total ascending'), 
                   showlegend = False, legend_orientation="v")
fig2 = px.bar(df_regres1.head(top), x="Count", y="CityMunRes",
             orientation='h', hover_name='Percentage',
             text="Count",
             title="City/Municipalities with Most Cases")
fig2.update_traces(textposition='outside', textfont_size=12)
fig2.update_layout(title=dict(x=0.5), yaxis=dict(categoryorder='total ascending'),
                   showlegend = False, legend_orientation="v")
#fig1.show()
#fig2.show()

fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Provinces with Most Cases", "Cities with Most Cases"))
fig000.add_trace(fig1.data[0], row=1, col=1)
fig000.add_trace(fig2.data[0], row=1, col=2)
fig000.update_layout(yaxis1=dict(categoryorder='total ascending'), yaxis2=dict(categoryorder='total ascending'), height=500)
fig000.update_xaxes(range=(0, round(max(df_regres.Count) + max(df_regres.Count)/4)), row=1, col=1)
fig000.update_xaxes(range=(0, round(max(df_regres1.Count) + max(df_regres1.Count)/4)), row=1, col=2)
fig000.update_layout(xaxis={"domain": [0.05, 0.40]}, xaxis2={"domain": [0.6, 1]},
                     hoverlabel=dict(font_size=15))
fig000.show()
In [903]:
z1 = covid_ph[covid_ph.RegionRes.isnull()].shape[0]
z2 = covid_ph[covid_ph.ProvRes == 'For Validation'].shape[0]

print('Note:')
print('1. There are', z1, 'cases no information about his/her residence.')
print('2. There are', z2, '('+str(round(z2/covid_ph.shape[0],1)*100)+'%'+')' , 'cases with region of origin but no specific province/city of residence. These are currently tagged as "For Validation".')
Note:
1. There are 47 cases no information about his/her residence.
2. There are 0 (0.0%) cases with region of origin but no specific province/city of residence. These are currently tagged as "For Validation".
In [906]:
x = []
for i in range(0, len(geojson['features'])):
    z = list(geojson['features'][i].values())[1]['PROVINCE']
    x.append(z)
p = pd.DataFrame(x).rename({0: 'Province'}, axis=1)

df_regres0 = df_regres.copy()
df_regres0['ProvRes'] = df_regres0['ProvRes'].str.title()
df_regres0['ProvRes'] = df_regres0['ProvRes'].replace({'Metro Manila': 'Metropolitan Manila',
                                                       'Davao Del Sur': 'Davao del Sur',
                                                       'Zamboanga Del Sur': 'Zamboanga del Sur',
                                                       'Davao Del Norte': 'Davao del Norte',
                                                       'Lanao Del Sur': 'Lanao del Sur',
                                                       'Lanao Del Norte': 'Lanao del Norte',
                                                       'Agusan Del Norte': 'Agusan del Norte',
                                                       'Cotabato': 'North Cotabato', 
                                                       'Cebu Province': 'Cebu', 
                                                       'Iloilo Province': 'Iloilo', 
                                                       'Tarlac Province': 'Tarlac'})

df_regres0 = df_regres0.reset_index(drop=True)

# CHECKER
prov = pd.merge(p, df_regres0, how='outer', left_on='Province', right_on='ProvRes')
prov['Count_log'] = np.log10(prov['Count'])
prov['Count_log'] = prov['Count_log'].fillna(0)
prov['Count'] = prov['Count'].fillna(0)
prov.tail(10)
Out[906]:
Province ProvRes Count PH Percentage Count_log
73 Sulu Sulu 1.0 Philippines 0.0% 0.000000
74 Surigao del Norte NaN 0.0 NaN NaN 0.000000
75 Surigao del Sur NaN 0.0 NaN NaN 0.000000
76 Tarlac Tarlac 32.0 Philippines 0.3% 1.505150
77 Tawi-Tawi NaN 0.0 NaN NaN 0.000000
78 Zambales Zambales 26.0 Philippines 0.2% 1.414973
79 Zamboanga del Norte NaN 0.0 NaN NaN 0.000000
80 Zamboanga del Sur Zamboanga del Sur 126.0 Philippines 1.1% 2.100371
81 Zamboanga Sibugay NaN 0.0 NaN NaN 0.000000
82 NaN Davao Occidental 1.0 Philippines 0.0% 0.000000

Geo Mapping

In [907]:
fig = px.choropleth(prov, geojson=geojson, color="Count_log",
                    locations="Province", featureidkey="properties.PROVINCE",
                    hover_name='Count',
                    projection="mercator", 
                    labels={'Count_log': 'log(Count)'}, 
                    title='Number of Covid-19 Confirmed Cases in the Philippines (as of ' + str(asof.date().strftime("%b %d, %Y")) + ")" , 
                    color_continuous_scale=px.colors.sequential.OrRd,  
                    height=1100)
fig.update_layout(hoverlabel=dict(font_size=20, bgcolor='white', bordercolor='red'))
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
In [908]:
df_ncr = df_regres1[df_regres1['ProvRes'] == 'METRO MANILA'].reset_index(drop=True)
df_ncr['CityMunRes'] = df_ncr['CityMunRes'].replace({'QUEZON CITY': 'Quezon City',
                         'CITY OF MANILA': 'Manila', 
                         'CITY OF PARAÑAQUE': 'Parañaque',
                         'CITY OF MAKATI': 'Makati City', 
                         'CITY OF MANDALUYONG': 'Mandaluyong', 
                         'CITY OF PASIG': 'Pasig City', 
                         'TAGUIG CITY': 'Taguig', 
                         'CITY OF SAN JUAN': 'San Juan', 
                         'CALOOCAN CITY': 'Kalookan City',
                         'PASAY CITY': 'Pasay City', 
                         'CITY OF LAS PIÑAS': 'Las Piñas',
                         'CITY OF MUNTINLUPA': 'Muntinlupa', 
                         'CITY OF MARIKINA': 'Marikina', 
                         'CITY OF VALENZUELA': 'Valenzuela', 
                         'CITY OF MALABON': 'Malabon', 
                         'CITY OF NAVOTAS': 'Navotas', 
                         'PATEROS': 'Pateros'})

# CHECKER
z = []
for i in range(0, len(geojson1['features'])):
    y = list(geojson1['features'][i].values())[1]['NAME_2']
    z.append(y)
p = pd.DataFrame(z).rename({0: 'City'}, axis=1)
p

check = pd.merge(df_ncr, p, how='left', left_on='CityMunRes', right_on='City')
# check

Cumulative Cases By Area

Metro Manila, Cebu & Other Provinces (With Most Cases)

In [910]:
print("As of", asof.date().strftime("%b %d, %Y")) 
top = 10
topreg = df_regres.head(top).ProvRes.tolist()

df_regres_daily = covid_ph.groupby(['ProvRes', 'DateRepConf']).count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres_daily.sort_values(by=['ProvRes','DateRepConf'], inplace=True)
#df_regres_daily['ProvRes'] = df_regres_daily['ProvRes'].replace({'NCR': 'Metropolitan Manila'})
#df_regres_daily['Cum_count'] = df_regres_daily.groupby('ProvRes')['CaseCode'].cumsum()
df_regres_daily = df_regres_daily[df_regres_daily.ProvRes != 'For Validation']

df_regres_daily1 = pd.DataFrame()
for i in topreg:
    a = df_regres_daily[df_regres_daily.ProvRes == i].rename({'CaseCode': 'Count'}, axis=1)
    
    base = pd.DataFrame(covid_ph.DateRepConf.sort_values().unique())
    base.rename({0: 'DateRepConf'}, axis=1, inplace=True)
    # base = base[base.DateRepConf >= '2020-03-06']
    
    q = pd.merge(base, a, how='left', on='DateRepConf')
    q['ProvRes'] = i
    q['Count'] = q.Count.fillna(0)
    q['Cum_count'] = q.Count.cumsum()
#     q['Percent_change'] = q.Cum_count.pct_change()*100
    
    df_regres_daily1 = df_regres_daily1.append(q)

df_regres_daily1

# Figure 1
fig0 = px.line(df_regres_daily1, x='DateRepConf', y='Cum_count', 
               color='ProvRes', labels={'Cum_count': 'Cumulative Count'}, 
               title="Cumulative Case Count on Location with Most Cases")
fig0.update_layout(showlegend=False) 

a = pd.DataFrame(topreg).rename({0: 'RegRes'}, axis=1)
for i in topreg[:2]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*10 + i, #PLEASE EDIT IF NEEDED
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
fig0.show()

# Figure 2
fig1 = px.line(df_regres_daily1[~df_regres_daily1.ProvRes.isin(topreg[:2])], x='DateRepConf', y='Cum_count',
               color='ProvRes', labels={'Cum_count': 'Cumulative Count'}, 
               title="Cumulative Case Count on Location with Most Cases (excluding NCR & Cebu Province)", 
               color_discrete_sequence=px.colors.qualitative.Plotly[2:])
fig1.update_layout(showlegend=False) 

a = pd.DataFrame(topreg).rename({0: 'RegRes'}, axis=1)
# for i in topreg[1:2]:
#     yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
#     fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_regres_daily1.DateRepConf.iloc[-1],
#                          y=yyy+5,
#                          text=" "*1 + str(yyy.astype('int')),
#                          font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
#     fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_regres_daily1.DateRepConf.iloc[-1],
#                          y=yyy+5,
#                          text=" "*8 + i,
#                          font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
for i in topreg[2:5]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))

for i in topreg[5:6]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    
for i in topreg[6:7]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))

for i in topreg[7:8]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))

for i in topreg[8:9]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    
for i in topreg[9:10]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy-5,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy-5,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))       
fig1.show()
As of May 16, 2020

NCR Cities

Daily Cases By Area

Metro Manila, Cebu & Other Provinces (With Most Cases)

Note: Trendline (in blue) is based on 7-day Moving Average.

In [915]:
# import os

# if not os.path.exists("images"):
#     os.mkdir("images")

# fig000.write_image("images/provinces.png")
py.plot(fig000, filename='daily_cases_Provinces.html')
Out[915]:
'daily_cases_Provinces.html'
In [916]:
# prov = ['TARLAC PROVINCE']
# fig000 = make_subplots(rows=5, cols=2, subplot_titles=prov)

# for i in prov: # EVEN NUMBER
#     df = df_regres_daily[df_regres_daily.ProvRes == i].reset_index(drop=True)
#     df = df.rename({'CaseCode': 'Count'}, axis=1)
#     cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
#     df['Trend_HP'] = trend1
#     df['Trend_MA7'] = df['Count'].rolling(7).mean()
#     df['Trend_MA3'] = df['Count'].rolling(3).mean()
#     df = df[df.DateRepConf >= '2020-03-01']
    
#     fig1 = px.bar(df[df.ProvRes == i], x='DateRepConf', y='Count',
#                   labels={'DateRepConf': 'Date'},
#                   color_discrete_sequence=['lightblue'])
#     fig1_trend = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_HP', line_shape='spline', color_discrete_sequence=["blue"])
    
#     fig000.add_trace(fig1.data[0], row=prov[::2].index(i)+1, col=1)
#     fig000.add_trace(fig1_trend.data[0], row=prov[::2].index(i)+1, col=1)

# fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
# fig000.update_yaxes(tickfont_size=10, title="Case Count", titlefont_size=10)
# fig000.update_layout(title=dict(x=0.5), showlegend=False, height=2000,
#                      legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"), 
#                      hoverlabel=dict(font_size=18))

    
# fig000.show()

NCR Cities

Note: Trendline (in blue) is based on 7-day Moving Average.

In [918]:
fig000 = make_subplots(rows=9, cols=2, subplot_titles=topcity)

for i in topcity[::2]: # ODD NUMBERS
    df = df_city_daily1[df_city_daily1.CityMunRes == i].reset_index(drop=True)
    cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
    df['Trend_HP'] = trend1
    df['Trend_MA7'] = df['Count'].rolling(7).mean()
    df['Trend_MA3'] = df['Count'].rolling(3).mean()
    df = df[df.DateRepConf >= '2020-03-01']
    
    fig1 = px.bar(df[df.CityMunRes == i], x='DateRepConf', y='Count',
                  labels={'DateRepConf': 'Date'},
                  color_discrete_sequence=['lightblue'])
    fig1_trend = px.line(df[df.CityMunRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline', 
                         color_discrete_sequence=["blue"])
    fig000.add_trace(fig1.data[0], row=topcity[::2].index(i)+1, col=1)
    fig000.add_trace(fig1_trend.data[0], row=topcity[::2].index(i)+1, col=1)
    
for i in topcity[1::2]: # EVEN NUMBERS
    df = df_city_daily1[df_city_daily1.CityMunRes == i].reset_index(drop=True)
    cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
    df['Trend_HP'] = trend1
    df['Trend_MA7'] = df['Count'].rolling(7).mean()
    df['Trend_MA3'] = df['Count'].rolling(3).mean()
    df = df[df.DateRepConf >= '2020-03-01']
    
    fig1 = px.bar(df[df.CityMunRes == i], x='DateRepConf', y='Count',
                  labels={'DateRepConf': 'Date'},
                  color_discrete_sequence=['lightblue'])
    fig1_trend = px.line(df[df.CityMunRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline',
                         color_discrete_sequence=["blue"])
    fig000.add_trace(fig1.data[0], row=topcity[1::2].index(i)+1, col=2)
    fig000.add_trace(fig1_trend.data[0], row=topcity[1::2].index(i)+1, col=2)

fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
fig000.update_yaxes(tickfont_size=10, title="Case Count", titlefont_size=10)
fig000.update_layout(title=dict(x=0.5, text="COVID-19 DAILY CASES in NCR"), showlegend=False, height=3200,
                     legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"), 
                     hoverlabel=dict(font_size=18))

    
fig000.show()
In [919]:
py.plot(fig000, filename='daily_cases_NCR.html')
Out[919]:
'daily_cases_NCR.html'

Testing Ratio

In [920]:
df_tests_tot = df_tests.groupby('Date')[['UNIQUE INDIVIDUALS TESTED',
                                         'POSITIVE INDIVIDUALS', 'NEGATIVE INDIVIDUALS', 
                                         'TOTAL TESTS CONDUCTED', 'REMAINING AVAILABLE TESTS']].sum()
df_tests_tot['INVALID'] = df_tests_tot['UNIQUE INDIVIDUALS TESTED'] - (df_tests_tot['POSITIVE INDIVIDUALS'] + df_tests_tot['NEGATIVE INDIVIDUALS'])
df_tests_tot['%POSITIVE'] = 100*(df_tests_tot['POSITIVE INDIVIDUALS'] / df_tests_tot['UNIQUE INDIVIDUALS TESTED'])
df_tests_tot['%NEGATIVE'] = 100*(df_tests_tot['NEGATIVE INDIVIDUALS'] / df_tests_tot['UNIQUE INDIVIDUALS TESTED'])
df_tests_tot['%INVALID'] = 100 - (df_tests_tot['%POSITIVE'] + df_tests_tot['%NEGATIVE'])
df_tests_tot['TOTAL TESTS AVAILABLE'] = df_tests_tot['TOTAL TESTS CONDUCTED'] + df_tests_tot['REMAINING AVAILABLE TESTS']
df_tests_tot = df_tests_tot.reset_index()
# df_tests_tot
In [921]:
# Figure 0
data = pd.melt(df_tests_tot[-1:], id_vars=['Date'],
               value_vars=['TOTAL TESTS CONDUCTED', 'REMAINING AVAILABLE TESTS'],
               var_name='Test_Ratio', value_name = 'Count')
data['Test_Ratio'] = data['Test_Ratio'].replace({'REMAINING AVAILABLE TESTS': 'REMAINING AVAILABLE TEST SUPPLIES'})
data['Percent'] = (data.Count / data.Count.sum())*100
data['Pct'] = round(data['Percent'],1).astype('str') + '%'
data

q = 100*round(df_tests_tot[-1:].iloc[:,1].values[0] / df_tests_tot[-1:].iloc[:,4].values[0],3)
fig = px.bar(data, x='Percent', orientation='h', color='Test_Ratio', 
             text='Count', hover_name='Pct', height=250, labels={'Percent': ''},
             color_discrete_sequence=['#66c2a5', '#84dbc0'])
fig.update_layout(legend=dict(orientation='h', title=""), 
                  title="Availability of Test Kits")
fig.update_layout(showlegend=True)

fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=15))
for i in range(0, len(data.Percent.unique())):
#     fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                             arrowhead=0, xanchor='right', yanchor='middle',
#                             ax=0, ay=0,
#                             x=data.Percent.cumsum().unique()[i]-0.001, 
#                             y=-0.5,
#                             text=data.Test_Ratio[i]), font_size=15,
#                        font_color = ['#66c2a5', '#84dbc0'][i])
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.Percent.cumsum().unique()[i]-0.001, 
                            y=0.5,
                            text=data.Pct[i]), font_size=15,
                       font_color = ['#66c2a5', '#84dbc0'][i])
    
print("As of", df_tests.Date.max().date().strftime("%b %d, %Y"))
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
As of May 14, 2020
Notes:
a. There are 435 or 0.2 % invalid/equivocal tests
b. There are more positive individuals tested vs publicly reported because some needs to be further validated
In [923]:
df = pd.melt(df_tests_tot, id_vars=['Date'],
            value_vars=['%NEGATIVE', '%POSITIVE', '%INVALID'], 
            var_name = 'Ratio', value_name = 'Percent')
fig = px.bar(df, x='Date', y='Percent', color='Ratio')
fig.update_layout(title=dict(x=0.5, text="Cumulative Testing Ratio on Unique Individuals"),
                  xaxis=dict(tickmode='linear', #tick0=4, 
                             dtick=0, tickfont_size=12, title="Date"),
                  yaxis=dict(tickfont_size=10),
                  legend=dict(orientation="h", title="",
                              x=0.58, y=0.97, yanchor="bottom"))

fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=True, showgrid=True, zeroline=False, showline=False)
fig.update_layout(hovermode='x')

# Latest Rate
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='left', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(df_tests_tot.Date.unique().max()), 
#                         y=df_tests_tot['%NEGATIVE'][-1:].values[0],
#                         text=" "*2 + round(df_tests_tot['%NEGATIVE'][-1:].values[0],1).astype('str')+'%', 
#                         font_size=14, font_color=px.colors.qualitative.Plotly[0]))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='left', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(df_tests_tot.Date.unique().max()), 
#                         y=df_tests_tot['%POSITIVE'][-1:].values[0],
#                         text=" "*2 + round(df_tests_tot['%POSITIVE'][-1:].values[0],1).astype('str')+'%', 
#                         font_size=14, font_color=px.colors.qualitative.Plotly[1]))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='left', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(df_tests_tot.Date.unique().max()), 
#                         y=df_tests_tot['%INVALID'][-1:].values[0],
#                         text=" "*2 + round(df_tests_tot['%INVALID'][-1:].values[0],1).astype('str')+'%', 
#                         font_size=10, font_color=px.colors.qualitative.Plotly[2]))
fig.show()

print("Note: Daily testing ratio here is based on the total test conducted as of that date")
Note: Daily testing ratio here is based on the total test conducted as of that date
*Based on the current ratio of total positive cases over total conducted tests
In [925]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig1 = px.bar(df_tests_tot, x='Date', 
              # y='UNIQUE INDIVIDUALS TESTED',
              y='NEW UNIQUE TESTS',
              color_discrete_sequence=["#00CC96"])
fig2 = px.line(df_tests_tot, x='Date', 
               y='NEW %POSITIVE', 
               color_discrete_sequence=["red"])
fig2.data[0].update(mode='markers+lines')

df_tests_tot['TREND_NAME'] = 'Trend (7-day MA)'
fig_trend = px.line(df_tests_tot, x='Date', 
               y='TREND', color='TREND_NAME',
               color_discrete_sequence=["red"])

fig.add_trace(fig1.data[0], secondary_y=False)
fig.add_trace(fig2.data[0], secondary_y=True)
fig.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}), secondary_y=True)

fig.update_yaxes(title="%Positive", tickfont_size=10, color='red',
                 range=[0, df_tests_tot['%POSITIVE'].max()*1.1], 
                 showgrid=False, secondary_y=True)
fig.update_yaxes(title='No. of Conducted Test', color='#00CC96',
                 #range=[0, df_tests_tot['UNIQUE INDIVIDUALS TESTED'].max()*1.5], 
                 showgrid=True, tickfont_size=10, secondary_y=False)

fig.update_layout(title=dict(x=0.5, text="Estimated %Positive** on Daily Tests"),
                  xaxis=dict(tickmode='linear', #tick0=4,
                             dtick=0, tickfont_size=10, title="Date"),
                  yaxis=dict(dtick=1000), showlegend=True,
                  legend=dict(orientation="h", title="", 
                              x=0.7, y=1, yanchor="bottom"))

# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='left', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(df_tests_tot.Date.unique().max()), 
#                         y=df_tests_tot['%POSITIVE'][-1:].values[0],
#                         text=" "*2 + round(df_tests_tot['%POSITIVE'][-1:].values[0],1).astype('str')+'%', 
#                         font_size=14, font_color=px.colors.qualitative.Plotly[1]))

fig.show()

print('**Caveat: %Positive is just computed based on the ratio of new positive cases reported over conducted tests on that day. The reported new positive cases were tested several days ago.') 
**Caveat: %Positive is just computed based on the ratio of new positive cases reported over conducted tests on that day. The reported new positive cases were tested several days ago.

Time of Recovery / Death

Most of these death cases probably needs to be validated if caused by Covid-19 before publicly reported.

Facility Capacity

In [928]:
# f.to_excel("./data/ph_list_of_facilities.xlsx")
In [929]:
df_hosp1 = df_hosp.sort_values(by=['hfhudcode', 'reportdate'], ascending=[True, False]).reset_index(drop=True)
df_hosp2 = df_hosp1.groupby('hfhudcode').first().reset_index()
df_hosp2

df_hosp3 = pd.merge(df_hosp2, f.iloc[:, :15], how='left', left_on='hfhudcode', right_on='HealthFacilityCode')
# df_hosp3

Total PH

Hospital Data as of 2020-05-16
Note:
- *These are only around 60% of PH's hospitals and infirmaries who reported on the DOH DataCollect App. Also, some hospitals & infirmaries don't have updated counts.
- There are 7 ( 0.4% ) facilities with data but no facility tagging from The National Health Facility Registry. Hence, dropped on this report.
In [931]:
data003 = pd.DataFrame()
for i in ['icu', 'isolbed', 'beds_ward', 'mechvent']:
    data3 = df_hosp3.groupby(['Ownership Major Classification', 
                              'Health Facility Type'])[[i+'_v', 
                                                        i+'_o']].sum().reset_index().rename({'Ownership Major Classification': 'OwnershipType', 
                                                                                             'Health Facility Type': 'FacilityType'}, axis=1)
    data03 = pd.melt(data3, id_vars=['OwnershipType', 'FacilityType'], 
                     value_vars=[i+'_v', i+'_o'], var_name=['Availability'], 
                     value_name='Count')
    data03['Availability'] = data03['Availability'].replace({i+'_v': 'vacant', i+'_o': 'occupied'})
    data03['Type'] = i
    data003 = data003.append(data03)
    
data003['Type'] = data003['Type'].replace({'icu': 'ICU Beds', 'isolbed': 'Isolation Beds', 
                                           'beds_ward': 'Ward Beds', 'mechvent': 'Mechanical Ventilators'})
data003 = data003.reset_index(drop=True)
data003['Percent'] = data003.groupby(['Type','OwnershipType','FacilityType'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data003['Pct'] = round(data003['Percent'],1).astype('str') + '%'
data003['Type'] = data003['Type'].replace({'Mechanical Ventilators': 'Mech Vents'})
data003

##############################################################################

# TOTAL
data100 = data003.groupby(['Type', 'Availability'])['Count'].sum().reset_index()
data100['Percent'] = data100.groupby(['Type'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data100['Pct'] = round(data100['Percent'],1).astype('str') + '%'
x = data100[data100.Type.isin(['ICU Beds', 'Isolation Beds', 'Ward Beds'])]
y = data100[data100.Type.isin(['Mech Vents'])]
data100 = pd.concat([x, y])
data100 = data100.sort_values(by='Availability', ascending=False)

fig = px.bar(data100, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', labels={'Percent': '', 'Type': ''})
fig.update_layout(legend=dict(orientation='h', x=0.01, y=-0.1, title="", font_size=16), 
                  hoverlabel=dict(font_size=20),
                  title=("Availability of Beds & Mechanical Ventilators in <b>Total PH Facilities</b>"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=18))
fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='bottom', tickfont_size=16)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_layout(shapes=[dict(type= 'line', yref= 'paper', y0= -0.05, y1= 0.95, xref= 'x', x0= 2.5, x1= 2.5, 
                               line=dict(color="gray", width=1, dash="dash"))])
for i in range(0, len(data003.Type.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='center', yanchor='middle',
                            ax=0, ay=0,
                            x=i, 
                            y=-5,
                            text="Count: " + str(data003.groupby('Type').sum().Count[i])), font_size=12, font_color = "gray")
fig.show()
Breakdown by Facility Type
Note: The graphs above only shows utilization in facilities that have submitted to the DOHDataCollect App only.

In NCR Only

In [933]:
df_hosp3_ncr = df_hosp3[df_hosp3['Region Name'] == 'NATIONAL CAPITAL REGION (NCR)']

data0033 = pd.DataFrame()
for i in ['icu', 'isolbed', 'beds_ward', 'mechvent']:
    data33 = df_hosp3_ncr.groupby(['Ownership Major Classification', 
                                  'Health Facility Type'])[[i+'_v', 
                                                            i+'_o']].sum().reset_index().rename({'Ownership Major Classification': 'OwnershipType', 
                                                                                                 'Health Facility Type': 'FacilityType'}, axis=1)
    data033 = pd.melt(data33, id_vars=['OwnershipType', 'FacilityType'], 
                     value_vars=[i+'_v', i+'_o'], var_name=['Availability'], 
                     value_name='Count')
    data033['Availability'] = data033['Availability'].replace({i+'_v': 'vacant', i+'_o': 'occupied'})
    data033['Type'] = i
    data0033 = data0033.append(data033)
    
data0033['Type'] = data0033['Type'].replace({'icu': 'ICU Beds', 'isolbed': 'Isolation Beds', 
                                           'beds_ward': 'Ward Beds', 'mechvent': 'Mechanical Ventilators'})
data0033 = data0033.reset_index(drop=True)
data0033['Percent'] = data003.groupby(['Type','OwnershipType','FacilityType'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data0033['Pct'] = round(data003['Percent'],1).astype('str') + '%'
data0033['Type'] = data003['Type'].replace({'Mechanical Ventilators': 'Mech Vents'})
data0033

##############################################################################

# TOTAL
data100 = data0033.groupby(['Type', 'Availability'])['Count'].sum().reset_index()
data100['Percent'] = data100.groupby(['Type'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data100['Pct'] = round(data100['Percent'],1).astype('str') + '%'
x = data100[data100.Type.isin(['ICU Beds', 'Isolation Beds', 'Ward Beds'])]
y = data100[data100.Type.isin(['Mech Vents'])]
data100 = pd.concat([x, y])
data100 = data100.sort_values(by='Availability', ascending=False)

fig = px.bar(data100, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', labels={'Percent': '', 'Type': ''})
fig.update_layout(legend=dict(orientation='h', x=0.01, y=-0.1, title="", font_size=16), 
                  hoverlabel=dict(font_size=20),
                  title=("Availability of Beds & Mechanical Ventilators in <b>NCR Facilities</b>"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=18))
fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='bottom', tickfont_size=16)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_layout(shapes=[dict(type= 'line', yref= 'paper', y0= -0.05, y1= 0.95, xref= 'x', x0= 2.5, x1= 2.5, 
                               line=dict(color="gray", width=1, dash="dash"))])
for i in range(0, len(data003.Type.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='center', yanchor='middle',
                            ax=0, ay=0,
                            x=i, 
                            y=-5,
                            text="Count: " + str(data0033.groupby('Type').sum().Count[i])), font_size=12, font_color = "gray")
fig.show()
Facilities by Cities in NCR
Note: The graphs above only shows utilization in facilities that have submitted to the DOHDataCollect App only.
In [935]:
# df_ppe1 = df_ppe.sort_values(by=['hfhudcode', 'reportdate'], ascending=[True, False]).reset_index(drop=True)
# df_ppe2 = df_ppe1.groupby('hfhudcode').first().reset_index()
# df_ppe2

# print("Availability of PPEs in Different Hospitals:")
# print("- Coverall:", df_ppe2.coverall.sum())
# print("- Face Shield:", df_ppe2.face_shield.sum())
# print("- Goggles:", df_ppe2.goggles.sum())
# print("- Gown:", df_ppe2.gown.sum())
# print("- N95 Mask:", df_ppe2.n95mask.sum())
# print("- Shoe Cover:", df_ppe2.shoe_cover.sum())
# print(" ")
# print("- Head Cover:", df_ppe2.head_cover.sum())
# print("- Surgical Mask:", df_ppe2.surgmask.sum())
# print("- Gloves:", df_ppe2.gloves.sum())

R(t) Estimation using Bayesian Approach

Main Reference: Github by Kevin Systrom

In any epidemic, $R_t$ is the measure known as the effective reproduction number. It's the number of people who become infected per infectious person at time $t$. The most well-known version of this number is the basic reproduction number: $R_0$ when $t=0$. However, $R_0$ is a single measure that does not adapt with changes in behavior and restrictions.

As a pandemic evolves, increasing restrictions (or potential releasing of restrictions) changes $R_t$. Knowing the current $R_t$ is essential. When $R\gg1$, the pandemic will spread through a large part of the population. If $R_t$ $<1$, the pandemic will slow quickly before it has a chance to infect many people. The lower the $R_t$: the more manageable the situation.

Therefore, we want achieve a $R_t$ $<1$ which means that the pandemic in an area is under control.

In [936]:
def prepare_cases(cases):
    new_cases = cases.diff()

    smoothed = new_cases.rolling(7,
        win_type='gaussian',
        min_periods=1,
        center=True).mean(std=2).round()
    
    zeros = smoothed.index[smoothed.eq(0)]
    if len(zeros) == 0:
        idx_start = 0
    else:
        last_zero = zeros.max()
        idx_start = smoothed.index.get_loc(last_zero) + 1
    smoothed = smoothed.iloc[idx_start:]
    original = new_cases.loc[smoothed.index]
    
    return original, smoothed

We will start the computation on March 5, 2020 - the date with last zero new case followed by a volume of new cases. To get the trend, Gaussian filter was applied to arrive with a smoothed time series. This will be our basis in calculating for each days' likelihood and posteriors as part of the Bayesian approach in estimating $R_t$. If you want to know more about the methodology, check out the work by Kevin Systrom.

In [937]:
# cases = cases_daily.cumsum()
cases = cases_daily[35:].cumsum() # March 5, 2020
cases = cases_daily.cumsum()
original, smoothed = prepare_cases(cases)

# original.plot(title=f"PH Daily New Cases",
#               c='k',
#               linestyle=':',
#               alpha=.5, 
#               label='Actual',
#               legend=True,
#               figsize=(1200/72, 600/72)
#              )

# ax = smoothed.plot(label='Smoothed', color='#0084ff', legend=True)
# ax.get_figure().set_facecolor('w')

Next, since our results include uncertainty, we'd like to be able to view the most likely value of $R_t$ along with its highest-density interval (HDI). HDI indicates which points of a distribution are most credible, and which cover most of the distribution. It is similar to a confidence interval where actual value mostly like falls on a range given a probability ($p$). For this analysis, we will be defining the High HDI and Low HDI given $p=0.9$.

In [940]:
def highest_density_interval(pmf, p=.9):
    # If we pass a DataFrame, just call this recursively on the columns
    if(isinstance(pmf, pd.DataFrame)):
        return pd.DataFrame([highest_density_interval(pmf[col], p=p) for col in pmf],
                            index=pmf.columns)
    
    cumsum = np.cumsum(pmf.values)
    best = None
    for i, value in enumerate(cumsum):
        for j, high_value in enumerate(cumsum[i+1:]):
            if (high_value-value > p) and (not best or j<best[1]-best[0]):
                best = (i, i+j+1)
                break
            
    low = pmf.index[best[0]]
    high = pmf.index[best[1]]
    return pd.Series([low, high], index=[f'Low_{p*100:.0f}', f'High_{p*100:.0f}'])
Estimated Rt Low_90 High_90
DateRepConf
2020-05-12 1.17 0.79 1.60
2020-05-13 1.16 0.76 1.55
2020-05-14 1.08 0.70 1.49
2020-05-15 0.98 0.60 1.39
2020-05-16 0.92 0.52 1.31
In [943]:
fig = px.line(most_likely, y=most_likely.values, x=most_likely.index)
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Effective Reproduction - R(t)')

fig.update_layout(title=dict(x=0.5, text='PH Estimated R(t) by Day'), 
                  showlegend=True,
                  legend=dict(orientation="v", title="",
                              x=0.8, y=0.95, yanchor="top"), 
                  hoverlabel=dict(font_size=18))

fig.add_scatter(x=hdis['Low_90'].index, y=hdis['Low_90'].values, 
                fill='tonexty', line=dict(color='lightgray'), name='HDI (Low @ 90%)')
fig.add_scatter(x=hdis['High_90'].index, y=hdis['High_90'].values, 
                fill='tonexty', line=dict(color='lightgray'), name='HDI (High @ 90%)')
fig.add_scatter(y=most_likely.values, x=most_likely.index, line=dict(color='blue'), name='<b>R(t)</b>')
fig.add_scatter(y=[1]*most_likely.index.shape[0], x=most_likely.index, 
                line=dict(color='orange', dash='dash'), name='R(t) = 1')

fig.show()

Metro Manila, Cebu & Other Provinces

In [946]:
def prepare_cases(cases):
    new_cases = cases.diff()

    smoothed = new_cases.rolling(7,
        win_type='gaussian',
        min_periods=1,
        center=True).mean(std=2).round()
    
    zeros = smoothed.index[smoothed.eq(0)]
    if len(zeros) == 0:
        idx_start = 0
    else:
        last_zero = zeros.max()
        idx_start = smoothed.index.get_loc(last_zero) + 1
    smoothed = smoothed.iloc[idx_start:]
    original = new_cases.loc[smoothed.index]
    
    return original, smoothed
In [947]:
results_ncr = pd.DataFrame()
for i in topcity[:-3]:
    df0 = df_city_daily1[df_city_daily1['CityMunRes'] == i] 
    df = df0[['DateRepConf', 'Cum_count']][df0.DateRepConf >= '2020-03-05']
    df1 = df.set_index('DateRepConf')
    df1 = df1.iloc[:,0].astype('int64')
    
    idx = pd.date_range('2020-03-05', df1.index[-1].date())
    df1.index = pd.DatetimeIndex(df1.index)
    df1 = df1.reindex(idx, fill_value=0)
    df1

    original, smoothed = prepare_cases(df1)

    posteriors, log_likelihood = get_posteriors(smoothed, sigma=.15)

    # Note that this takes a while to execute - it's not the most efficient algorithm
    hdis = highest_density_interval(posteriors, p=.9)
    most_likely = posteriors.idxmax().rename('Estimated Rt')

    # Look into why you shift -1
    result_ncr = pd.concat([most_likely, hdis], axis=1)
    result_ncr['CityMunRes'] = i
    results_ncr = results_ncr.append(result_ncr)
Note: Malabon, Navatos and Pateros can't be estimated well due to small number of cases

Log-Ratio: New Cases vs Total Cumulative Cases

The growth started to slow down during the first week of April & currently moving (fluctuating) sideways. But we hope to see the trend to go down.

Doubling Time

Doubling time is the defined as the length of time for the present value of variable to double given the prevailing rate of increase. It is based on the compound accumulation model. Here's the formula for doubling time ${d_{t,y}}$:

${d_{t,y}}$ = $\frac{ln (2)}{ln(1+P_{t,y})}$

where $P_{t,y}$ is the percent change of a value (i.e. Covid-19 total cases) at any time ${t}$

Source: https://drive.google.com/file/d/1vQ1V283pcl9ttIfyJaAC8ZSM3slSnew9/view?fbclid=IwAR2fjxAVDSYQQYWr209EPUGdSp7GlxLaRhHH4-G4_3065s4M9IvVatUo6zE

In [950]:
# "Epidemic doubling times characterize the sequence of intervals at which the cumulative incidence doubles (3). If an epidemic is growing exponentially with a constant growth rate ${r}$, the doubling time remains constant and equals $\frac{ln 2}{r}$. An increase in the doubling time indicates a slowdown in transmission if the underlying reporting rate remains unchanged." 
# Source: https://wwwnc.cdc.gov/eid/article/26/8/20-0219_article

# To calculate the doubling time for a population (e.g. COVID-19 cases), use the formula = $\frac{x ln(2)}{ln \frac{y}{z}$, where

# - x is the time that has passed since you started measuring. For example, if the number of cases went from 500 on day 0 to 1000 on day 2, x is 2.
# - y is the number of cases on day x, e.g 1000 on day 2.
# - z is the number of cases on day 0, e.g. 500."

# Source: https://blog.datawrapper.de/weekly-chart-coronavirus-doublingtimes/
In [951]:
# # version1
# c2 = c1.copy()
# c2['doubling_time1'] = np.log(2) / (c2['%Change']/100)
# c2['doubling_time2'] = np.log(2) / (c2['trend']/100)

# # version2
# c2 = c1.copy()
# c2['doubling_time1'] = np.log10(2) / (c2['%Change']/100)
# c2['doubling_time2'] = np.log10(2) / (c2['trend']/100)

# version3
c2 = c1.copy()
c2['doubling_time1'] = np.log(2) / np.log(1 + (c2['%Change']/100))
c2['doubling_time2'] = np.log(2) / np.log(1 + (c2['trend']/100))

# version3.1
# c2['doubling_time3'] = c2['doubling_time1'].rolling(7).mean()
# cyclex, trendx = sm.tsa.filters.hpfilter(c2['doubling_time1'], 7)
# c2['doubling_time4'] = trendx

c2['Days'] = 'Doubling Time'
c2['Trend_name1'] = 'Trend (based on HP Filter)'
fig_trend1 = px.line(c2, x='Date', y='doubling_time2', color='Trend_name1', 
                     color_discrete_sequence=["cadetblue"], 
                     labels={'doubling_time2': 'Days-To-Double'})
# c2['Trend_name2'] = 'Trend (based on 7-day rolling mean)'
# fig_trend2 = px.line(c2, x='Date', y='doubling_time4', color='Trend_name2', 
#                      color_discrete_sequence=["cadetblue"])

fig2 = px.line(c2, x='Date', y='doubling_time1', color='Days',
               labels={'doubling_time1': 'Days-To-Double'},
               color_discrete_sequence=["lightblue"])

fig2.add_trace(fig_trend1.data[0].update(line={'dash': 'dash'}))
# fig2.add_trace(fig_trend2.data[0].update(line={'dash': 'dashdot'}))

fig2.update_yaxes(title="Days-to-Double", showgrid=True, tickfont_size=15)
fig2.update_layout(title=dict(x=0.5, text="Doubling Time in Confirmed Cases for PH"),
                   xaxis=dict(tickmode='array', #tick0=4, dtick=0, 
                              tickfont_size=9, title="Date", showgrid=False),
                   legend=dict(orientation="v", title="",
                              x=0.1, y=0.85, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))
fig2.update_layout(shapes=[dict(type='line', 
                                yref='y', y0=30, y1=30, 
                                xref='paper', x0=0, x1=1, 
                                line=dict(color="orange", width=1))])
fig2.show()

PH vs ASEAN Countries

Source: https://www.worldometers.info/coronavirus/

As of May 18, 2020 07:34:12
In [ ]: